--- title: Analyse customers keywords: fastai sidebar: home_sidebar summary: "transactions per user," description: "transactions per user," nb_path: "nbs/04_data_analyse_customers.ipynb" ---
from customer_segmentation_toolkit.data.load_split import load_data_csv
DATA = '../data/output/03_data_compute_description_keywords'
with open(f'{DATA}/n_purchase_clusters.txt', 'r') as f:
N_PURCHASE_CLUSTERS = int(f.read())
logging.info(f'N_PURCHASE_CLUSTERS={N_PURCHASE_CLUSTERS}')
basket_price = load_data_csv(f'{DATA}/no_live_data__cleaned__purchase_clusters__train.csv')
basket_price.head()
transactions_per_user = build_transactions_per_user(basket_price, n_purchase_clusters=N_PURCHASE_CLUSTERS)
transactions_per_user.head()
n1 = compute_n_customers_with_unique_purchase(transactions_per_user)
n2 = transactions_per_user.shape[0]
print("nb. de clients avec achat unique: {:<2}/{:<5} ({:<2.2f}%)".format(n1,n2,n1/n2*100))
matrix = convert_customers_df_to_np(transactions_per_user, N_PURCHASE_CLUSTERS)
scaled_matrix, pca = analyse_customers_pca(matrix)
plot_customers_pca(matrix, pca)
N_CUSTOMER_CLUSTERS = 11
clusters_clients = compute_customer_clusters(scaled_matrix, N_CUSTOMER_CLUSTERS)
print(pd.DataFrame(pd.Series(clusters_clients).value_counts(), columns = ['nb. de clients']).T)
silhouette_avg = silhouette_score(scaled_matrix, clusters_clients)
print('score de silhouette: {:<.3f}'.format(silhouette_avg))
from customer_segmentation_toolkit.data.analyse_purchases import plot_silhouette
sample_silhouette_values = silhouette_samples(scaled_matrix, clusters_clients)
plot_silhouette(N_CUSTOMER_CLUSTERS, [-0.15, 0.55], len(scaled_matrix), sample_silhouette_values, clusters_clients)
plot_customer_categories(scaled_matrix, clusters_clients, N_CUSTOMER_CLUSTERS)
Original cells 59-61:
selected_customers_df = add_customer_clusters_info(transactions_per_user, clusters_clients)
selected_customers_df
merged_df = compute_aggregated_customer_clusters_info(selected_customers_df, N_PURCHASE_CLUSTERS, N_CUSTOMER_CLUSTERS)
print('number of customers:', merged_df['size'].sum())
merged_df